knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(here)
library(sf)
library(tmap)
sf_trees <- read_csv(here('data', 'sf_trees', 'sf_trees.csv'),
show_col_types = FALSE)
Example 1: Find Counts of observation legal_status & wrangle a bit.
### method 1: group_by() %>% summarize()
sf_trees %>%
group_by(legal_status) %>%
summarize(tree_count = n())
## # A tibble: 10 × 2
## legal_status tree_count
## <chr> <int>
## 1 DPW Maintained 141725
## 2 Landmark tree 42
## 3 Permitted Site 39732
## 4 Planning Code 138.1 required 971
## 5 Private 163
## 6 Property Tree 316
## 7 Section 143 230
## 8 Significant Tree 1648
## 9 Undocumented 8106
## 10 <NA> 54
## method 2: different way plus a few new functions
top_5_status <- sf_trees %>%
count(legal_status) %>% # pretty much the same as using group_by and summarize
drop_na(legal_status) %>%
rename(tree_count = n) %>%
relocate(tree_count) %>%
slice_max(tree_count, n= 5) %>%#slice_max() tells it to only take the top variables by within column specified and how many you want to keep i.e. n= X
arrange(desc(tree_count)) #arrange reorganizes data, either high to low or opposite, desc() specifies high to low
ggplot(data= top_5_status, aes(x= fct_reorder(legal_status, tree_count), y= tree_count)) +
geom_col(fill= 'darkgreen') +
labs(x= 'Legal Status', y= 'Tree Count', title = 'Plot of Tree Counts by Legal Status') +
coord_flip() +
theme_minimal()
Example 2: Only going to keep observations where legal status is “Permitted Site” and caretaker is “MTA”, and store as permitted_data_df
shift-cmd-c to comment/uncomment quickly
# sf_trees$legal_status %>% unique()
permitted_data_df <- sf_trees %>%
filter(legal_status == 'Permitted Site', caretaker == 'MTA')
Example 3 Only keep Blackwood Acacia Trees, and then only keep columns legal_status, date, latitude, longitude, and store as blackwood_acacia_df
blackwood_acacia_df <- sf_trees %>%
filter(str_detect(species, 'Blackwood Acacia')) %>% ##str_detect tells it to look into the column specified i.e. "species" and then find the observations that match the description i.e. "Blackwood Acacia"
select(legal_status, date, lat = latitude, long =longitude)
## Make a little graph of locations
ggplot(data= blackwood_acacia_df, aes(x= long, y= lat)) +
geom_point(color= 'darkgreen')
Example 4 Use tidyr::separate()
## Separate function helps to take apart two parts of a column by a specifying where you want to separate from, in this instance we use ' :: ' to tell it to break apart the scientific and common names by finding the ::
sf_trees_sep <- sf_trees %>%
separate(species, into = c('spp_scientific', 'spp_common'), sep = ' :: ')
Example 5 use `tidyr::unite() allows you to combine two columns
ex_5 <- sf_trees %>%
unite('id_status', tree_id, legal_status, sep = '_cool_')
Step 1 convert the lat/long to spatial point, st_as_sf()
blackwood_acacia_sf <- blackwood_acacia_df %>%
drop_na(lat, long) %>%
st_as_sf(coords = c('long', 'lat'))
## we need to tell r what the coordinate reference system is
st_crs(blackwood_acacia_sf) <- 4326
ggplot(data = blackwood_acacia_sf) +
geom_sf(color = 'blue') +
theme_minimal()
Read in the SF shapefile and add to map
sf_map <- read_sf(here('data', 'sf_map', 'tl_2017_06075_roads.shp'))
sf_map_transform <- st_transform(sf_map, 4326)
ggplot(data= sf_map_transform) +
geom_sf()
Combine the maps!
ggplot() +
geom_sf(data= sf_map,
size = .1,
color = 'darkgrey') +
geom_sf(data = blackwood_acacia_sf,
color = 'red',
size =0.5) +
theme_void() +
labs(title = 'Blackwood acacias in SF')
tmap_mode('view')
tm_shape(blackwood_acacia_sf) +
tm_dots()